### Creating raster data set
### 1 March 2023




# Preparation -------------------------------------------------------------

### load packages
library(foreign)
library(ggplot2)
library(tidyr)
library(dplyr)
library(maptools)
library(gridExtra)
library(ggmap)
library(data.table)
library(raster)
library(rgdal)
library(sp)
library(grid)
library(gridExtra)
library(Matching)
library(stargazer)
library(lmtest)
library(sandwich)
library(xtable)
library(geosphere)
library(readxl)
library(sf)
library(stars)
library(rgeos)



### Empty environment
rm(list=ls())

### Set working directory
path <- "" # Define path here
setwd(path)





# Prepare grid cells ------------------------------------------------------

### Load baseline shapefile: map of Europe in 1790 (taken from Abramson 2017)
shp1790 <- readOGR(paste(path,"/Raw data/Fragmentation Abramson 2017 shapefiles/poly_1790.shp",sep=""))
shp1790.sf <- st_as_sf(shp1790)

### Load shapefile of country borders in 1900 (to determine country)
shp1900 <- readOGR(paste(path,"/Raw data/Shapefile European countries 1900/Europe_1900_v.1.1.shp",sep=""))
shp1900 <- spTransform(shp1900, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs")) 

### create 100x100 grid cells based on this map
grid100 = st_as_stars(st_bbox(shp1790.sf), dx = 100000, dy = 100000)
grid100 = st_as_sf(grid100)
grid100 = grid100[shp1790.sf, ]

### convert to shape files
grid100.shp <- as(grid100, "Spatial")

### add IDs
grid100.shp@data$ID <- 1:dim(grid100.shp@data)[1]
grid100.shp@data$values <- NULL



# Define countries --------------------------------------------------------

### 100x100kms grid cell
out <- over(grid100.shp, shp1900, returnList=TRUE)
grid100.ccodes <- data.frame(grid100.shp@data[,1])
colnames(grid100.ccodes) <- "ID"
grid100.ccodes$ccodes <- NA
grid100.ccodes$ccodesNAME <- NA
grid100.ccodes$country_Ottoman <- 0
grid100.ccodes$country_Austria <- 0
grid100.ccodes$country_Belgium <- 0
grid100.ccodes$country_Denmark <- 0
grid100.ccodes$country_England <- 0
grid100.ccodes$country_Finland <- 0
grid100.ccodes$country_France <- 0
grid100.ccodes$country_Germany <- 0
grid100.ccodes$country_Greece <- 0
grid100.ccodes$country_Ireland <- 0
grid100.ccodes$country_Italy <- 0
grid100.ccodes$country_Netherlands <- 0
grid100.ccodes$country_Norway <- 0
grid100.ccodes$country_Poland <- 0
grid100.ccodes$country_Portugal <- 0
grid100.ccodes$country_Russia <- 0
grid100.ccodes$country_Scotland <- 0
grid100.ccodes$country_Spain <- 0
grid100.ccodes$country_Sweden <- 0
grid100.ccodes$country_Switzerland <- 0
grid100.ccodes$country_Bulgaria <- 0
grid100.ccodes$country_Hungary <- 0
grid100.ccodes$country_Romania <- 0
grid100.ccodes$country_Serbia <- 0
grid100.ccodes$country_Iceland <- 0



for(i in 1:length(out)){
  if(dim(out[[i]])[1] == 0){
    grid100.ccodes$ccodes[i] <- NA
    
    grid100.ccodes$country_Ottoman[i] <- NA
    grid100.ccodes$country_Austria[i] <- NA
    grid100.ccodes$country_Belgium[i] <- NA
    grid100.ccodes$country_Denmark[i] <- NA
    grid100.ccodes$country_England[i] <- NA
    grid100.ccodes$country_Finland[i] <- NA
    grid100.ccodes$country_France[i] <- NA
    grid100.ccodes$country_Germany[i] <- NA
    grid100.ccodes$country_Greece[i] <- NA
    grid100.ccodes$country_Ireland[i] <- NA
    grid100.ccodes$country_Italy[i] <- NA
    grid100.ccodes$country_Netherlands[i] <- NA
    grid100.ccodes$country_Norway[i] <- NA
    grid100.ccodes$country_Poland[i] <- NA
    grid100.ccodes$country_Portugal[i] <- NA
    grid100.ccodes$country_Russia[i] <- NA
    grid100.ccodes$country_Scotland[i] <- NA
    grid100.ccodes$country_Spain[i] <- NA
    grid100.ccodes$country_Sweden[i] <- NA
    grid100.ccodes$country_Switzerland[i] <- NA
    grid100.ccodes$country_Bulgaria[i] <- NA
    grid100.ccodes$country_Hungary[i] <- NA
    grid100.ccodes$country_Romania[i] <- NA
    grid100.ccodes$country_Serbia[i] <- NA
    grid100.ccodes$country_Iceland[i] <- NA
  }
  if(dim(out[[i]])[1] > 0){
    agg.out <- aggregate(out[[i]][,"AREA"], by=list(out[[i]][,"COUNTRY"]), FUN=sum, na.rm=T)
    grid100.ccodes$ccodes[i] <- agg.out$Group.1[agg.out$x == max(agg.out$x)]
    
    grid100.ccodes$country_Ottoman[i][0 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Austria[i][10 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Belgium[i][31 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Denmark[i][40 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_England[i][50 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Finland[i][60 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_France[i][70 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Germany[i][80 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Greece[i][90 %in% agg.out$Group.1 | 91 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Ireland[i][100 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Italy[i][110 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Netherlands[i][120 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Norway[i][130 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Poland[i][140 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Portugal[i][150 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Russia[i][160 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Scotland[i][170 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Spain[i][181 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Sweden[i][190 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Switzerland[i][200 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Bulgaria[i][220 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Hungary[i][240 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Romania[i][250 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Serbia[i][260 %in% agg.out$Group.1] <- 1
    grid100.ccodes$country_Iceland[i][270 %in% agg.out$Group.1] <- 1
  }
}

grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 0] <- "Ottoman Empire"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 10] <- "Austria-Hungary"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 31] <- "Belgium"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 40] <- "Denmark"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 50] <- "England"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 60] <- "Finland"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 70] <- "France"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 80] <- "Germany"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 90] <- "Greece"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 91] <- "Greece"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 100] <- "Ireland"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 110] <- "Italy"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 120] <- "Netherlands"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 130] <- "Norway"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 140] <- "Poland"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 150] <- "Portugal"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 160] <- "Russia"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 170] <- "Scotland"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 181] <- "Spain"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 190] <- "Sweden"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 200] <- "Switzerland"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 220] <- "Bulgaria"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 240] <- "Hungary"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 250] <- "Romania"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 260] <- "Serbia"
grid100.ccodes$ccodesNAME[grid100.ccodes$ccodes == 270] <- "Iceland"






# Load relevant data ------------------------------------------------------

### Parliaments
parliaments <- read.dta(paste(path,"/Raw data/parliamentsVBB.dta", sep=""))

### Cities
cities.bairoch <- read.dta(paste(path,"/Raw data/citiesBairoch.dta", sep=""))
cities.bosker <- read.dta(paste(path,"/Raw data/citiesBosker.dta", sep=""))

### Universities
universities <- read.dta(paste(path,"/Raw data/universities.dta", sep=""))
universities <- subset(universities, founding <= 1850)
universities$year <- universities$founding

### Bishoprics
bish <- read.dta(paste(path,"/Raw data/bishopricsDARMC.dta", sep=""))

### Communes
communes <- read.dta(paste(path,"/Raw data/communeVBB.dta", sep=""))
communes$year[communes$year == 1800] <- 1790

### HRE Cantoni
CantoniHRE <- read.dta(paste(path,"/Raw data/HRECantoni.dta", sep=""))

### Dincecco and Onorato data
DO <- read.dta(paste(path,"/Raw data/Dincecco and Onorato 2016.dta", sep=""))
DO$year[DO$year == 1800] <- 1790

### Cantoni protestantism (city-level)
protestantism <- read.dta(paste(path,"/Raw data/protestantismCantoniCity.dta", sep=""))
protestantism$year[protestantism$year == 1800] <- 1790

### Rubin protestantism
protRubin <- read.dta(paste(path,"/Raw data/protestantismRubin.dta", sep=""))

### From state file: primogeniture, Protestantism, and papal conflict at the territory-level (Cantoni)
state <- read.dta(paste(path,"/State file/StateDataFinal.dta", sep=""))
state <- subset(state, select = c(Name, year, Cantoni_t_protestant_at_t, KS_primogeniture, conflicts_RA_forward_papal))

### Papal conflict
conflict <- read.dta(paste(path,"/Raw data/papalconflictBrecke.dta", sep=""))


# Write function ----------------------------------------------------------

### write function
assign <- function(year){
  # load shapefile
  if(year<1300){
    shp <- readOGR(paste(path,"/Raw data/Fragmentation Abramson 2017 shapefiles/",
                         year,"_poly.shp",sep=""))
  }
  if(year>=1300){
    shp <- readOGR(paste(path,"/Raw data/Fragmentation Abramson 2017 shapefiles/poly_",
                         year,".shp",sep=""))    
  }
  
  # change CRS
  shp <- spTransform(shp, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs")) 

  # extract data (so we don't overwrite the original data)
  grid.shp <- grid100.shp
  
  # extract data
  d <- grid.shp@data
  
  # define year
  d$year <- year

  # count overlap
  d$NState <- sapply(over(grid.shp, shp, returnList=TRUE), nrow)
  
  # code primogeniture and territory-level protestantism
  intersection <- intersect(gBuffer(shp, byid=TRUE, width=0), gBuffer(grid.shp, byid=TRUE, width=0))
  d$KS_primogeniture <- NA
  d$Cantoni_t_protestant_at_t <- NA
  d$conflicts_RA_forward_papal <- NA  
  for(i in 1:dim(d)[1]){
    d$KS_primogeniture[i] <- mean(state$KS_primogeniture[state$Name %in% intersection@data$Name[intersection@data$ID == i] & state$year == year], na.rm=T)
    d$conflicts_RA_forward_papal[i] <- sum(state$conflicts_RA_forward_papal[state$Name %in% intersection@data$Name[intersection@data$ID == i] & state$year == year], na.rm=T)
    d$Cantoni_t_protestant_at_t[i] <- mean(state$Cantoni_t_protestant_at_t[state$Name %in% intersection@data$Name[intersection@data$ID == i] & state$year == year], na.rm=T)
  }
  d$KS_primogeniture[is.nan(d$KS_primogeniture)] <- NA
  d$Cantoni_t_protestant_at_t[is.nan(d$Cantoni_t_protestant_at_t)] <- NA

  # create subsets of the data & turn into spatial points
  cities.bairochALL.sub <- unique(cities.bairoch[(cities.bairoch$year_min <= year),c("lon","lat")])
  cities.bairochALL.sub <- SpatialPoints(cities.bairochALL.sub[!is.na(cities.bairochALL.sub$lon),])
  proj4string(cities.bairochALL.sub) <- CRS("+proj=longlat +datum=WGS84")
  cities.bairochALL.sub <- spTransform(cities.bairochALL.sub, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))
  
  cities.bosker5k.sub <- unique(cities.bosker[cities.bosker$year <= year & cities.bosker$citypop_le5>0 & 
                                                cities.bosker$year == as.numeric(substr(year,1,2))*100,c("lon","lat")])
  cities.bosker5k.sub <- SpatialPoints(cities.bosker5k.sub[!is.na(cities.bosker5k.sub$lon),])
  proj4string(cities.bosker5k.sub) <- CRS("+proj=longlat +datum=WGS84")
  cities.bosker5k.sub <- spTransform(cities.bosker5k.sub, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))
  
  parliaments.sub <- unique(parliaments[parliaments$year <= year  & 
                                          parliaments$year == as.numeric(substr(year,1,2))*100 & 
                                          parliaments$parliament==1,c("lon","lat")])
  parliaments.sub <- SpatialPoints(parliaments.sub[!is.na(parliaments.sub$lon),])
  proj4string(parliaments.sub) <- CRS("+proj=longlat +datum=WGS84")
  parliaments.sub <- spTransform(parliaments.sub, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))  
  
  universitiesAll.sub <- universities[universities$year <= year,c("lon","lat")]
  universitiesAll.sub <- SpatialPoints(universitiesAll.sub[!is.na(universitiesAll.sub$lon),])
  proj4string(universitiesAll.sub) <- CRS("+proj=longlat +datum=WGS84")
  universitiesAll.sub <- spTransform(universitiesAll.sub, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))  
  
  bish.sub <- bish[bish$year <= year,c("long","lat")]
  bish.sub <- SpatialPoints(bish.sub[!is.na(bish.sub$long),])
  proj4string(bish.sub) <- CRS("+proj=longlat +datum=WGS84")  
  bish.sub <- spTransform(bish.sub, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))    
  
  if(year %in% c(1100,1200,1300,1400,1500,1600,1700,1790)){
    vbb.commune.sub <- communes[communes$VBB_commune_final == 1 & communes$year == year,c("lon", "lat")]
    vbb.commune.sub <- SpatialPoints(vbb.commune.sub[!is.na(vbb.commune.sub$lon),])  
    proj4string(vbb.commune.sub) <- CRS("+proj=longlat +datum=WGS84")  
    vbb.commune.sub <- spTransform(vbb.commune.sub, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))  
    
    do.commune.sub <- DO[DO$DO_commune == 1 & DO$year == year,c("lon", "lat")]
    do.commune.sub <- SpatialPoints(do.commune.sub[!is.na(do.commune.sub$lon),])  
    proj4string(do.commune.sub) <- CRS("+proj=longlat +datum=WGS84")  
    do.commune.sub <- spTransform(do.commune.sub, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))  
    
    do.dummy.all150.sub <- DO[DO$DO_dummy_all150 == 1 & DO$year == year, c("lon", "lat")]
    do.dummy.all150.sub <- SpatialPoints(do.dummy.all150.sub[!is.na(do.dummy.all150.sub$lon),])
    proj4string(do.dummy.all150.sub) <- CRS("+proj=longlat +datum=WGS84")  
    do.dummy.all150.sub <- spTransform(do.dummy.all150.sub, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))  
  }
  
  if(year %in% c(1400,1450,1550,1650,1750)){
    CantoniHRE.sub <- CantoniHRE[CantoniHRE$time_point == year,c("longitude", "latitude")]
    CantoniHRE.sub <- SpatialPoints(CantoniHRE.sub[!is.na(CantoniHRE.sub$longitude),])
    proj4string(CantoniHRE.sub) <- CRS("+proj=longlat +datum=WGS84")  
    CantoniHRE.sub <- spTransform(CantoniHRE.sub, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))  
  }
  
  if(year %in% c(1100, 1200, 1300, 1400, 1500, 1600, 1700, 1750, 1790)){
    Cantoni_protestant.sub0 <- protestantism[protestantism$Cantoni_protestant_at_t == 0 & protestantism$year == year & !is.na(protestantism$Cantoni_protestant_at_t), c("lon", "lat")]
    Cantoni_protestant.sub0 <- SpatialPoints(Cantoni_protestant.sub0[!is.na(Cantoni_protestant.sub0$lon),])
    proj4string(Cantoni_protestant.sub0) <- CRS("+proj=longlat +datum=WGS84")  
    Cantoni_protestant.sub0 <- spTransform(Cantoni_protestant.sub0, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs")) 
    
    Cantoni_protestant.sub1 <- protestantism[protestantism$Cantoni_protestant_at_t == 1 & protestantism$year == year & !is.na(protestantism$Cantoni_protestant_at_t), c("lon", "lat")]
    Cantoni_protestant.sub1 <- SpatialPoints(Cantoni_protestant.sub1[!is.na(Cantoni_protestant.sub1$lon),])
    proj4string(Cantoni_protestant.sub1) <- CRS("+proj=longlat +datum=WGS84")  
    Cantoni_protestant.sub1 <- spTransform(Cantoni_protestant.sub1, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs")) 
  }
  
  # get city's protestantism status
  Rubin_prot1530.sub0 <- protRubin[protRubin$Rubin_prot1530 == 0 & protRubin$year == 1600 & !is.na(protRubin), c("lon", "lat")]
  Rubin_prot1530.sub0 <- SpatialPoints(Rubin_prot1530.sub0[!is.na(Rubin_prot1530.sub0$lon),])
  proj4string(Rubin_prot1530.sub0) <- CRS("+proj=longlat +datum=WGS84")  
  Rubin_prot1530.sub0 <- spTransform(Rubin_prot1530.sub0, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))   
  
  Rubin_prot1530.sub1 <- protRubin[protRubin$Rubin_prot1530 == 1 & protRubin$year == 1600 & !is.na(protRubin$Rubin_prot1530), c("lon", "lat")]
  Rubin_prot1530.sub1 <- SpatialPoints(Rubin_prot1530.sub1[!is.na(Rubin_prot1530.sub1$lon),])
  proj4string(Rubin_prot1530.sub1) <- CRS("+proj=longlat +datum=WGS84")  
  Rubin_prot1530.sub1 <- spTransform(Rubin_prot1530.sub1, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs")) 
  
  Rubin_prot1560.sub0 <- protRubin[protRubin$Rubin_prot1560 == 0 & protRubin$year == 1600 & !is.na(protRubin$Rubin_prot1560), c("lon", "lat")]
  Rubin_prot1560.sub0 <- SpatialPoints(Rubin_prot1560.sub0[!is.na(Rubin_prot1560.sub0$lon),])
  proj4string(Rubin_prot1560.sub0) <- CRS("+proj=longlat +datum=WGS84")  
  Rubin_prot1560.sub0 <- spTransform(Rubin_prot1560.sub0, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs")) 
  
  Rubin_prot1560.sub1 <- protRubin[protRubin$Rubin_prot1560 == 1 & protRubin$year == 1600 & !is.na(protRubin$Rubin_prot1560), c("lon", "lat")]
  Rubin_prot1560.sub1 <- SpatialPoints(Rubin_prot1560.sub1[!is.na(Rubin_prot1560.sub1$lon),])
  proj4string(Rubin_prot1560.sub1) <- CRS("+proj=longlat +datum=WGS84")  
  Rubin_prot1560.sub1 <- spTransform(Rubin_prot1560.sub1, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))  
  
  Rubin_prot1600.sub0 <- protRubin[protRubin$Rubin_prot1600 == 0 & protRubin$year == 1600 & !is.na(protRubin$Rubin_prot1600), c("lon", "lat")]
  Rubin_prot1600.sub0 <- SpatialPoints(Rubin_prot1600.sub0[!is.na(Rubin_prot1600.sub0$lon),])
  proj4string(Rubin_prot1600.sub0) <- CRS("+proj=longlat +datum=WGS84")  
  Rubin_prot1600.sub0 <- spTransform(Rubin_prot1600.sub0, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))   
  
  Rubin_prot1600.sub1 <- protRubin[protRubin$Rubin_prot1600 == 1 & protRubin$year == 1600 & !is.na(protRubin$Rubin_prot1600), c("lon", "lat")]
  Rubin_prot1600.sub1 <- SpatialPoints(Rubin_prot1600.sub1[!is.na(Rubin_prot1600.sub1$lon),])
  proj4string(Rubin_prot1600.sub1) <- CRS("+proj=longlat +datum=WGS84")  
  Rubin_prot1600.sub1 <- spTransform(Rubin_prot1600.sub1, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs"))    

  # assign cities and buildings to grid cells
  d <- merge(d, data.frame(table(over(universitiesAll.sub, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- "UniversitiesAllN" 
  
  d <- merge(d, data.frame(table(over(parliaments.sub, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- c("ParliamentsN")  
  
  d <- merge(d, data.frame(table(over(bish.sub, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- c("BishopricsN")      
  
  d <- merge(d, data.frame(table(over(cities.bairochALL.sub, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- c("CitiesBairochAllN")
  
  d <- merge(d, data.frame(table(over(cities.bosker5k.sub, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- c("CitiesBosker5kN")
  
  # code missings as 0
  d$UniversitiesAllN[is.na(d$UniversitiesAllN)] <- 0
  d$ParliamentsN[is.na(d$ParliamentsN)] <- 0
  d$BishopricsN[is.na(d$BishopricsN)] <- 0
  d$CitiesBairochAllN[is.na(d$CitiesBairochAllN)] <- 0
  d$CitiesBosker5kN[is.na(d$CitiesBosker5kN)] <- 0
  
  if(year %in% c(1100,1200,1300,1400,1500,1600,1700,1790)){
    d <- merge(d, data.frame(table(over(vbb.commune.sub, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
    colnames(d)[dim(d)[2]] <- c("VBBCommuneN")
    
    d <- merge(d, data.frame(table(over(do.commune.sub, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
    colnames(d)[dim(d)[2]] <- c("DOCommuneN")  
    
    d <- merge(d, data.frame(table(over(do.dummy.all150.sub, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
    colnames(d)[dim(d)[2]] <- c("DOdummyall150N")

    # code missings as 0
    d$VBBCommuneN[is.na(d$VBBCommuneN)] <- 0
    d$DOCommuneN[is.na(d$DOCommuneN)] <- 0
    d$DOdummyall150N[is.na(d$DOdummyall150N)] <- 0
  }
  
  if(!(year %in% c(1100,1200,1300,1400,1500,1600,1700,1790))){
    # create empty variables for data that is not available
    d$VBBCommuneN <- NA
    d$DOCommuneN <- NA
    d$DOdummyall150N <- NA
  }
  
  # dummy: part of the HRE?
  d$HRE_Nuessli <- NA
  
  if(year %in% c(1100,1200,1300,1400,1500,1600,1700)){
    # load shape file
    hre <- readOGR(paste(path,"/Raw data/Nuessli shapefiles HRE/",
                         year,"/supranational_entities.shp",sep=""))
    
    # adjust CRS
    hre <- spTransform(hre, CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +units=m +no_defs")) 
    
    # overlap?
    d$HRE_Nuessli <- sapply(over(grid.shp, hre, returnList=TRUE), nrow)
  }
  
  # count number of HRE cities (Cantoni data)
  if(year %in% c(1400,1450,1550,1650,1750)){
    d <- merge(d, data.frame(table(over(CantoniHRE.sub, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
    colnames(d)[dim(d)[2]] <- "HRE_Cantoni"  
    d$HRE_Cantoni[is.na(d$HRE_Cantoni)] <- 0
  }
  
  if(!(year %in% c(1400,1450,1550,1650,1750))){
    d$HRE_Cantoni <- NA
  }
  
  # Cantoni protestantism
  if(year %in% c(1100, 1200, 1300, 1400, 1500, 1600, 1700, 1750, 1790)){
    d <- merge(d, data.frame(table(over(Cantoni_protestant.sub0, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
    colnames(d)[dim(d)[2]] <- "Cantoni_protestant_at_t0"  
    
    d <- merge(d, data.frame(table(over(Cantoni_protestant.sub1, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
    colnames(d)[dim(d)[2]] <- "Cantoni_protestant_at_t1"      
    
    d$Cantoni_protestant_at_t <- d$Cantoni_protestant_at_t1
    d$Cantoni_protestant_at_t[d$Cantoni_protestant_at_t0 >= 1 & is.na(d$Cantoni_protestant_at_t)] <- 0
    
    d$Cantoni_protestant_at_t_baseline <- d$Cantoni_protestant_at_t0 + d$Cantoni_protestant_at_t1
    d$Cantoni_protestant_at_t_baseline[is.na(d$Cantoni_protestant_at_t0)] <- d$Cantoni_protestant_at_t1[is.na(d$Cantoni_protestant_at_t0)]
    d$Cantoni_protestant_at_t_baseline[is.na(d$Cantoni_protestant_at_t1)] <- d$Cantoni_protestant_at_t0[is.na(d$Cantoni_protestant_at_t1)]
    
    d$Cantoni_protestant_at_t_share <- d$Cantoni_protestant_at_t / d$Cantoni_protestant_at_t_baseline
    
    d$Cantoni_protestant_at_t0 <- NULL
    d$Cantoni_protestant_at_t1 <- NULL
    d$Cantoni_protestant_at_t_baseline <- NULL
  }
  
  # Rubin protestantism in 1530
  d <- merge(d, data.frame(table(over(Rubin_prot1530.sub0, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- "Rubin_prot1530_0" 
  
  d <- merge(d, data.frame(table(over(Rubin_prot1530.sub1, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- "Rubin_prot1530_1" 
  
  d$Rubin_prot1530 <- d$Rubin_prot1530_1
  d$Rubin_prot1530[d$Rubin_prot1530_0 >= 1 & is.na(d$Rubin_prot1530)] <- 0  
  
  d$Rubin_prot1530_baseline <- d$Rubin_prot1530_0 + d$Rubin_prot1530_1
  d$Rubin_prot1530_baseline[is.na(d$Rubin_prot1530_0)] <- d$Rubin_prot1530_1[is.na(d$Rubin_prot1530_0)]
  d$Rubin_prot1530_baseline[is.na(d$Rubin_prot1530_1)] <- d$Rubin_prot1530_0[is.na(d$Rubin_prot1530_1)]    
  
  d$Rubin_prot1530_share <- d$Rubin_prot1530 / d$Rubin_prot1530_baseline
  
  d$Rubin_prot1530_0 <- NULL
  d$Rubin_prot1530_1 <- NULL
  d$Rubin_prot1530_baseline <- NULL
  
  # Rubin protestantism in 1560
  d <- merge(d, data.frame(table(over(Rubin_prot1560.sub0, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- "Rubin_prot1560_0" 
  
  d <- merge(d, data.frame(table(over(Rubin_prot1560.sub1, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- "Rubin_prot1560_1" 
  
  d$Rubin_prot1560 <- d$Rubin_prot1560_1
  d$Rubin_prot1560[d$Rubin_prot1560_0 >= 1 & is.na(d$Rubin_prot1560)] <- 0  
  
  d$Rubin_prot1560_baseline <- d$Rubin_prot1560_0 + d$Rubin_prot1560_1
  d$Rubin_prot1560_baseline[is.na(d$Rubin_prot1560_0)] <- d$Rubin_prot1560_1[is.na(d$Rubin_prot1560_0)]
  d$Rubin_prot1560_baseline[is.na(d$Rubin_prot1560_1)] <- d$Rubin_prot1560_0[is.na(d$Rubin_prot1560_1)]    
  
  d$Rubin_prot1560_share <- d$Rubin_prot1560 / d$Rubin_prot1560_baseline
  
  d$Rubin_prot1560_0 <- NULL
  d$Rubin_prot1560_1 <- NULL
  d$Rubin_prot1560_baseline <- NULL
  
  
  # Rubin protestantism in 1600
  d <- merge(d, data.frame(table(over(Rubin_prot1600.sub0, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- "Rubin_prot1600_0" 
  
  d <- merge(d, data.frame(table(over(Rubin_prot1600.sub1, grid.shp)$ID)), by.x = "ID", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- "Rubin_prot1600_1" 
  
  d$Rubin_prot1600 <- d$Rubin_prot1600_1
  d$Rubin_prot1600[d$Rubin_prot1600_0 >= 1 & is.na(d$Rubin_prot1600)] <- 0  
  
  d$Rubin_prot1600_baseline <- d$Rubin_prot1600_0 + d$Rubin_prot1600_1
  d$Rubin_prot1600_baseline[is.na(d$Rubin_prot1600_0)] <- d$Rubin_prot1600_1[is.na(d$Rubin_prot1600_0)]
  d$Rubin_prot1600_baseline[is.na(d$Rubin_prot1600_1)] <- d$Rubin_prot1600_0[is.na(d$Rubin_prot1600_1)]    
  
  d$Rubin_prot1600_share <- d$Rubin_prot1600 / d$Rubin_prot1600_baseline
  
  d$Rubin_prot1600_0 <- NULL
  d$Rubin_prot1600_1 <- NULL
  d$Rubin_prot1600_baseline <- NULL   
  

  if(!(year %in% c(1100, 1200, 1300, 1400, 1500, 1600, 1700, 1750, 1790))){
    d$Cantoni_protestant_at_t <- NA
    d$Cantoni_protestant_at_t_share <- NA
  }
  
  # output
  return(d)
}




# Implement function and create raster data -------------------------------

### implement function
# 1100
d <- assign(year = 1100)

# 1105-1790
for(year in seq(1105,1790,5)){
  print(year)
  d <- rbind(d, assign(year=year))
}


### add the country codes and names
d <- merge(d, grid100.ccodes, by="ID", all.x=T)

### Code Rubin protestantism
# count of cities
d$Rubin_prot <- NA
d$Rubin_prot[d$year < 1530 & !is.na(d$Rubin_prot1530)] <- 0
d$Rubin_prot[d$year >= 1530 & d$year < 1560] <- d$Rubin_prot1530[d$year >= 1530 & d$year < 1560]
d$Rubin_prot[d$year >= 1560 & d$year < 1600] <- d$Rubin_prot1560[d$year >= 1560 & d$year < 1600]
d$Rubin_prot[d$year >= 1600] <- d$Rubin_prot1600[d$year >= 1600]

# share of cities
d$Rubin_prot_share <- NA
d$Rubin_prot_share[d$year < 1530 & !is.na(d$Rubin_prot1530_share)] <- 0
d$Rubin_prot_share[d$year >= 1530 & d$year < 1560] <- d$Rubin_prot1530_share[d$year >= 1530 & d$year < 1560]
d$Rubin_prot_share[d$year >= 1560 & d$year < 1600] <- d$Rubin_prot1560_share[d$year >= 1560 & d$year < 1600]
d$Rubin_prot_share[d$year >= 1600] <- d$Rubin_prot1600_share[d$year >= 1600]

# drop Rubin protestant variables that are no longer needed
d$Rubin_prot1530 <- NULL
d$Rubin_prot1560 <- NULL
d$Rubin_prot1600 <- NULL

d$Rubin_prot1530_share <- NULL
d$Rubin_prot1560_share <- NULL
d$Rubin_prot1600_share <- NULL

### save data
write.dta(d, paste(path,"/Raster file/RasterDataFinal.dta", sep=""), convert.factors = "string")
